/*
This code produces tabs on the matches generated by the fuzzy matching procedure that I developed
Let's see what I have and what I may need to add

Some thoughts on strategy:
	- Focus on just the first round of loans first
		- By what is on the SBA loan info
		- size
		- Type of entity
		- geography
		- Who is not matches
	- Matched to what:
		- EIN
		- Ownership return
		- employees
	- Matched how:
		- Some sense of possible error rate as well
	- Other things to consider
		- Compare wages and loans to each other
		- perhaps something with the info from schedule k
*/

do "${dodir}/make_globals.do"

//Add EINS to the full PPP data set and then make the dataset unique by tin
use "${datadir}/sba/public_all", clear

merge 1:1 loannumber using "${datadir}/all_match", nogen

keep loannumber dateapproved processingmethod borrowername borroweraddress borrowercity borrowerstate borrowerzip /// 
initialapprovalamount currentapprovalamount undisbursedamount forgivenessamount jobsreported naicscode businesstype /// 
ein id match_type

/*
Data is probably good enough for now,
	- The improvent to consider is summing across multiple loans to the same number
	- Should just do that here and now
*/

//Make unique by id and processingmethod
gsort id dateapproved
gsort id processingmethod dateapproved
duplicates drop id loannumber processingmethod if id != ., force

/*
Now start to do some tabulations
*/

gen matched = id != .

tab matched if processingmethod == "PPP"

tab matched [aw=forgivenessamount] if processingmethod == "PPP"


//By the SBA entity type
preserve 
gcollapse (count) loannumber (sum) forgivenessamount jobsreported if processingmethod == "PPP", by(matched businesstype)
export excel "${outdir}/sum_stats/match_summary.xlsx", firstrow(variables) sheet("By SBA Type", replace)
restore

preserve 
gcollapse (count) loannumber (sum) forgivenessamount jobsreported if processingmethod == "PPP", by(businesstype)
export excel "${outdir}/sum_stats/match_summary.xlsx", firstrow(variables) sheet("By SBA Type Totals", replace)
restore

//clean up the businesstype variables for scale-ups
gen cond_bus_type = businesstype
replace cond_bus_type = "non-profit" if inlist(businesstype, "501(c)3 â Non Profit", "501(c)19 â Non Profit Veterans","501(c)3 â Non Profit", "501(c)6 â Non Profit Membership", "Non-Profit Childcare Center", "Non-Profit Organization", "Tribal Concerns", "501(c) â Non Profit except 3,4,6,")
replace cond_bus_type = "sole" if inlist(businesstype, "Independent Contractors", "Qualified Joint-Venture (spouses)", "Rollover as Business Start-Ups (ROB", "Self-Employed Individuals", "Single Member LLC", "Sole Proprietorship")
replace cond_bus_type = "partnership" if inlist(businesstype, "Cooperative", "Housing Co-op", "Joint Venture", "Limited Liability Partnership", "Partnership")
replace cond_bus_type = "c_corp" if inlist(businesstype, "Corporation")
replace cond_bus_type = "s_corp" if inlist(businesstype, "Subchapter S Corporation")
replace cond_bus_type = "ownership" if inlist(businesstype, "Employee Stock Ownership Plan(ESOP)", "Tenant in Common", "Trust")
replace cond_bus_type = "prof_assoc" if inlist(businesstype, "Professional Association")
replace cond_bus_type = "other" if inlist(businesstype, "")
replace cond_bus_type = "llc" if inlist(businesstype, "Limited  Liability Company(LLC)")

preserve 
gcollapse (count) loannumber (sum) forgivenessamount jobsreported if processingmethod == "PPP", by(matched cond_bus_type)
export excel "${outdir}/sum_stats/scale_up.xlsx", firstrow(variables) sheet("ein", replace)

reshape wide loannumber forgivenessamount jobsreported, i(cond_bus_type) j(matched)

gen loan_ein_wt = (loannumber1 + loannumber0) / loannumber1
gen dollar_ein_wt = (forgivenessamount1 + forgivenessamount0) / forgivenessamount1
gen job_ein_wt = (jobsreported1 + jobsreported0) / jobsreported1

keep cond_bus_type *wt

save "${datadir}/scale_ups/ein", replace

restore

//By how it was matched
preserve 
gcollapse (count) loannumber (sum) forgivenessamount jobsreported if processingmethod == "PPP", by(match_type)
export excel "${outdir}/sum_stats/match_type_summary.xlsx", firstrow(variables) sheet("raw", replace)
restore

//Now do totals by size 
gen firm_size = 1 if jobsreported >= 1 & jobsreported <= 9
replace firm_size = 2 if jobsreported > 9 & jobsreported <= 49
replace firm_size = 3 if jobsreported > 49 & jobsreported <= 99
replace firm_size = 4 if jobsreported > 99 & jobsreported <= 249
replace firm_size = 5 if jobsreported > 249

//First round
preserve 
gcollapse (count) loannumber (sum) forgivenessamount jobsreported if processingmethod == "PPP", by(matched firm_size)
export excel "${outdir}/sum_stats/match_summary_size.xlsx", firstrow(variables) sheet("Round 1", replace)
restore

//Second round
preserve 
gcollapse (count) loannumber (sum) forgivenessamount jobsreported if processingmethod == "PPS", by(matched firm_size)
export excel "${outdir}/sum_stats/match_summary_size.xlsx", firstrow(variables) sheet("Round 2", replace)
restore
